In [17]:
from livelossplot import PlotLossesKeras
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.model_selection import GroupShuffleSplit
from IPython.display import display # Allows the use of display() for DataFrames
import matplotlib.pyplot as plt
#import seaborn as sns
import cv2
import random
import shutil
import pathlib

%matplotlib inline

np.random.seed(100)

Prepare data

In [18]:
df = pd.read_csv("train.csv", header = 0)
display(df.std())

total_images = len(df)

# drop rows with new_whale because it is used to label various unknown flukes yet
#df = df[df.Id != 'new_whale']

# use targets with 3+ samples
df = df.groupby("Id").filter(lambda x: len(x) >= 3)

# Use top N targets by image count
value_counts = df.Id.value_counts()
top_hitters = value_counts.nlargest(10).index
df = df[df['Id'].isin(top_hitters)]

fig, ax = plt.subplots()
ax.set_yscale('log')
ax.set_ylabel("Whales")
ax.set_xlabel("Images per whale")
value_counts.hist(ax=ax,figsize=(20,5),bins=10, bottom=1)

classes = df.Id.unique()
num_classes = len(classes)

X_train = []; y_train = []

X_test = []; y_test = []

X_valid = []; y_valid = []

for whale_id in classes:
    df_whale = df[df.Id == whale_id]
    
    X_whale = np.array([os.path.join(os.getcwd(), 'train', s) for s in df_whale.Image])
    y_whale = np.array(df_whale.Id.values)
    
    X_train_whale, X_test_whale, y_train_whale, y_test_whale = \
        train_test_split(X_whale, y_whale, test_size=0.2, random_state=1)
    X_test.extend(X_test_whale)
    y_test.extend(y_test_whale)
    
    X_train_whale, X_valid_whale, y_train_whale, y_valid_whale = \
        train_test_split(X_train_whale, y_train_whale, test_size=0.2, random_state=1)
    X_train.extend(X_train_whale)
    y_train.extend(y_train_whale)
    X_valid.extend(X_valid_whale)
    y_valid.extend(y_valid_whale)

print('\nThere are %d total images.' % total_images)
print("Trainable...")
print('There are %d total classes.' % num_classes)

print('There are %d training images.' % len(X_train))
print('There are %d validation images.' % len(X_valid))
print('There are %d test images.' % len(X_test))

print("After data filtering")

df.describe()

print(classes)
workers = 8
batch_size = 32
Series([], dtype: float64)
There are 9850 total images.
Trainable...
There are 10 total classes.
There are 648 training images.
There are 169 validation images.
There are 211 test images.
After data filtering
['w_1287fbc' 'new_whale' 'w_fd1cb9d' 'w_73d5489' 'w_7554f44' 'w_1eafe46'
 'w_98baff9' 'w_693c9ee' 'w_43be268' 'w_ab4cae2']

Visualize 12 random training images

In [19]:
def visualize_img(img_path, ax):
    img = cv2.imread(img_path)
    ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    
fig = plt.figure(figsize=(50, 30))

num_images = 12
rand_images = random.sample(X_train, num_images)

for i, file in enumerate(rand_images):
    ax = fig.add_subplot(3, 4, i + 1, xticks=[], yticks=[])
    visualize_img(file, ax)

Visualize random whale from top hitters

In [20]:
random_whale = random.sample(list(top_hitters),1)
print(random_whale)

whale_images = list(df[df['Id'].isin(random_whale)]['Image'])
files = [s for s in X_train if any(w in s for w in whale_images)]

fig = plt.figure(figsize=(50, 30))

for i, file in enumerate(files[:12]):
    ax = fig.add_subplot(3, 4, i + 1, xticks=[], yticks=[])
    visualize_img(file, ax)
['w_43be268']

Prepare Dataset for ImageDataGenerator

In [21]:
def copy_class_of_files(files, dst, labels):
    for idx, val in enumerate(files):
        dst_dir = os.path.join(dst, labels[idx])
        pathlib.Path(dst_dir).mkdir(parents=True, exist_ok=True)
        shutil.copy(val, dst_dir)
        
shutil.rmtree('./data', ignore_errors=True)
copy_class_of_files(X_train, 'data/train', y_train)
copy_class_of_files(X_valid, 'data/valid', y_valid)
copy_class_of_files(X_test, 'data/test', y_test)

Load CNN without top layer

In [22]:
#from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.models import Model

#define VGG19 model
image_side_size = 150
image_dim = (image_side_size, image_side_size, 3)
base_model = VGG19(weights='imagenet', include_top=False,
                          input_tensor=None, input_shape=image_dim, 
                          pooling=None)
cut_off_layer = base_model.get_layer('block4_pool')

# define InceptionResNetV2 model
# image_side_size = 299
# image_dim = (image_side_size, image_side_size, 3)
# base_model = InceptionResNetV2(weights='imagenet', include_top=False,
#                           input_tensor=None, input_shape=image_dim, 
#                           pooling=None)
# cut_off_layer = base_model.get_layer('mixed_7a')

print(f"Layers {len(base_model.layers)}")

# freeze weights in all layers of the base model
for layer in base_model.layers:
    layer.trainable = False

print("Complete model before trimming")
base_model.summary()


# https://github.com/keras-team/keras/issues/8909#issuecomment-354406145
# https://github.com/keras-team/keras/issues/2371
base_model = Model(base_model.input, cut_off_layer.output)

print(f'Last base layer: {base_model.output}')

print("Trimmed model")
base_model.summary()
Layers 22
Complete model before trimming
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 37, 37, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 37, 37, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 37, 37, 256)       590080    
_________________________________________________________________
block3_conv4 (Conv2D)        (None, 37, 37, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 18, 18, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 18, 18, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 18, 18, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 18, 18, 512)       2359808   
_________________________________________________________________
block4_conv4 (Conv2D)        (None, 18, 18, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 9, 9, 512)         0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 9, 9, 512)         2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 9, 9, 512)         2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 9, 9, 512)         2359808   
_________________________________________________________________
block5_conv4 (Conv2D)        (None, 9, 9, 512)         2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 4, 4, 512)         0         
=================================================================
Total params: 20,024,384
Trainable params: 0
Non-trainable params: 20,024,384
_________________________________________________________________
Last base layer: Tensor("block4_pool_1/MaxPool:0", shape=(?, 9, 9, 512), dtype=float32)
Trimmed model
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 37, 37, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 37, 37, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 37, 37, 256)       590080    
_________________________________________________________________
block3_conv4 (Conv2D)        (None, 37, 37, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 18, 18, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 18, 18, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 18, 18, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 18, 18, 512)       2359808   
_________________________________________________________________
block4_conv4 (Conv2D)        (None, 18, 18, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 9, 9, 512)         0         
=================================================================
Total params: 10,585,152
Trainable params: 0
Non-trainable params: 10,585,152
_________________________________________________________________

Top layer specific to our problem

In [23]:
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.2)(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.2)(x)

predictions = Dense(num_classes, activation='softmax')(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

Compile model

In [24]:
from keras.optimizers import RMSprop

model.compile(loss='categorical_crossentropy', 
              optimizer=RMSprop(lr=0.0001), 
              metrics=['accuracy'])

Define image generators

In [25]:
from keras.preprocessing.image import ImageDataGenerator

target_size = (image_side_size, image_side_size)

train_datagen = ImageDataGenerator(
        rescale=1./255,
        horizontal_flip=True,
        vertical_flip=True,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        zoom_range=0.2,
        shear_range=0.2,
        fill_mode='nearest')

valid_datagen = ImageDataGenerator(rescale=1./255)

print("Train generator")
train_generator = train_datagen.flow_from_directory(
        'data/train',
        target_size = target_size,
        class_mode = 'categorical',
        batch_size = batch_size,
        shuffle=True)

print("Valid generator")
valid_generator = valid_datagen.flow_from_directory(
        'data/valid',
        target_size = target_size,
        class_mode = 'categorical',
        batch_size = batch_size)
Train generator
Found 648 images belonging to 10 classes.
Valid generator
Found 169 images belonging to 10 classes.
In [26]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
from time import time

best_weights_path=f'whale.flukes.{num_classes}_classes.weights.best.hdf5'

# train the model
checkpointer = ModelCheckpoint(filepath=best_weights_path, verbose=1, save_best_only=True)

# Stop the training if the model shows no improvement 
stopper = EarlyStopping(monitor='val_loss', 
                        min_delta=0.005, 
                        patience=100, 
                        verbose=1, 
                        mode='auto')

# Reduce learning rate when a metric has stopped improving.
reduce_lr = ReduceLROnPlateau(monitor='val_loss', 
                             factor=0.7,
                             patience=20,
                             min_lr=0.0000001,
                             verbose=1)

tensorboard = TensorBoard(log_dir="./logs/final/{}".format(time()),
                          batch_size=batch_size,
                          write_graph=True,
                          write_images=True,
                          histogram_freq=0,
                          write_grads=True)

model.fit_generator(generator=train_generator,
                    epochs=1000,
                    steps_per_epoch = len(X_train)//batch_size,
                    validation_data=valid_generator, 
                    validation_steps = len(X_valid)//batch_size,
                    callbacks=[checkpointer, stopper, reduce_lr,
                               PlotLossesKeras(), 
                               tensorboard],
                    verbose=1, workers=workers)
Epoch 00168: early stopping
Out[26]:
<keras.callbacks.History at 0x125fe6ef0>

Load the Model with the Best Validation Loss

In [27]:
model.load_weights(best_weights_path)

Calculate Classification Accuracy on Test Set

In [28]:
test_datagen = ImageDataGenerator(rescale=1. / 255)

test_generator = test_datagen.flow_from_directory(
    "data/test",
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical', shuffle=False)

score = model.evaluate_generator(
    generator=test_generator, 
    workers=workers)

print("Loss: ", score[0], "Accuracy: ", score[1])

scores = model.predict_generator(
    generator=test_generator, 
    workers=workers)
Found 211 images belonging to 10 classes.
Loss:  0.6949827727548319 Accuracy:  0.7962085296757413

Print confusion matrix

In [29]:
from sklearn.metrics import confusion_matrix
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

cnf_matrix = confusion_matrix(test_generator.classes, list(map(lambda x: np.argmax(x), scores)))
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure(figsize=(80,20))
plot_confusion_matrix(cnf_matrix, classes=classes, title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure(figsize=(80,20))
plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True, title='Normalized confusion matrix')

plt.show()
Confusion matrix, without normalization
Normalized confusion matrix

Write classification result

In [30]:
import csv
import ntpath

with open('predictions.csv', 'w') as csvfile:
    fieldnames = ['Image', 'Id']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    
    for idx, file_name in enumerate(X_test):
        probs = scores[idx]
        _, sorted_classes = zip(*sorted(zip(probs, classes), reverse=True))
        writer.writerow(dict(zip(fieldnames, [ntpath.basename(file_name), ' '.join(sorted_classes[:3])])))